---
title: "Main Models for Paper and SI"
output: pdf_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(tm)
library(dplyr)
library(webshot2)
library(lolog) 
library(network)
library(dotwhisker) 
library(magrittr) 
library(gridExtra) 
library(quanteda.textstats)
library(quanteda) 
library(igraph)
library(ggplot2)
library(ggpubr)
library(rstatix)
library(modelsummary)
library(kableExtra)
library(fixest)
library(coefplot)
library(ggplot2)
library(ggpubr)
library(tidyverse)
library(broom)

options(
  modelsummary_factory_default = "kableExtra",
  modelsummary_factory_latex = "kableExtra",
  modelsummary_factory_html = "kableExtra"
)
```
# Load data
```{r}
final_text <- read.csv("data/final_text.csv")
```
# Pre process data
```{r}
# filter for firms and associations
firms <- final_text %>% 
  filter(type == 0 | type == 1) 

rep_firm <- firms %>% 
  group_by(name_count) %>%
  mutate(count = n())

final_firm <- rep_firm %>% 
  mutate(final_sim = final_sim*100) %>% 
  mutate(log_count = log(count+1)) %>% 
  mutate(log_length = log(length+1)) %>% 
  mutate(log_words = log(word_lengths +1)) %>% 
  mutate(log_sim = log(final_sim+ 1)) %>% 
  mutate(matching = matching*100)

# filter for only firms
firm_size <- final_firm %>% 
  filter(type == "0")

# reclassify the firm_size variable as an integer (rather than character)
firm_size$firm_size <- as.integer(firm_size$firm_size)

# log variables
firm_size_log <- firm_size %>% 
  mutate(log_size = log(firm_size +1)) %>% 
  mutate(sector = substr(as.character(sector), 1, 2)) %>% 
  filter(!is.na(firm_size))

# recode logged variables as numeric
firm_size_log$length <- as.numeric(firm_size_log$length)
firm_size_log$firm_size <- as.numeric(firm_size_log$firm_size)
firm_size_log$word_lengths <- as.numeric(firm_size_log$word_lengths)
```

# Descriptive Stats: Figure 2, 3, 4, 5-16
```{r}
# Figure 2A

pie_sector <- firm_size_log %>% 
  group_by(sector) %>% 
  mutate(count = n()) %>% 
  summarize(sector, count) %>% 
  filter(sector != 0)

pie_sector <- unique(pie_sector)

names <- c("Mining, Quarrying, and Oil and Gas", "Utilities", "Construction",
           "Manufacturing", "Manufacturing", "Manufacturing", "Wholesale Trade",
           "Retail Trade", "Retail Trade", "Transportation and Warehousing",
           "Information", "Finance and Insurance", "Real Estate", "Professional,
           Scientific, and Technical Services", "Management of Companies and 
           Enterprises", "Administrative", "Arts, Entertainment, and Recreation",
           "Accommodation and Food Services", "Other Services", "Public Administration")

pie_sector$names <- names

pie_sector <- pie_sector %>% 
  filter(count > 20)

pie_2 <- ggplot(pie_sector, aes(x=names, y=count)) +
  geom_bar(stat="identity", fill = "#3366cc") +
  theme_classic()+
  labs(y = "Number of Firms", x = NULL)+
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1), axis.text = element_text(size = 14), axis.title = element_text(size = 16), legend.position = "none")

ggsave("figures/figure2a.png", plot = pie_2)

# Figure 2B
# Define the breakpoints for the bins
breaks <- c(0, 500, 1000, 5000, 10000, 50000, 100000, Inf)  # Inf represents positive infinity, ensuring all values above 1500 are included

# Define labels for the bins
labels <- c("0-499", "500-999", "1,000-4,999", "5,000-9,999", "10,000-49,999", "50,000-99,999", "100,000+")

firm_size_bins <- firm_size_log %>% 
  mutate("size" = cut(firm_size, breaks = breaks, labels = labels, right = FALSE))

pie_size <- firm_size_bins %>% 
  group_by(size) %>% 
  mutate(count = n()) %>% 
  summarize(size, count) %>% 
  filter(!is.na(size))

pie_size <- unique(pie_size)

pie_1 <- ggplot(pie_size, aes(x=size, y=count)) +
  geom_bar(stat="identity", fill = "#3366cc") +
  theme_bw()+
  theme_classic()+
  labs(y = "Number of Firms", x = "Number of Employees")+
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1), axis.text = element_text(size = 16), axis.title = element_text(size = 16), legend.position = "none")

ggsave("figures/figure2b.png", plot = pie_1)

# Figure 3

fe_bar <- rep_firm %>% 
  distinct(name_count, .keep_all = TRUE) %>% 
  group_by(count) %>% 
  summarize(count_firm = n())

num_periods <- ggplot(fe_bar, aes(x = count, y = count_firm))+
  geom_bar(stat = "identity", fill = "#3366cc")+
  theme_classic()+
  labs(y = "Number of Firms", x = "Number of Commenting Periods")+
  theme(axis.title.x = element_text(size = 20),  # Larger x-axis title text
    axis.title.y = element_text(size = 20),  # Larger y-axis title text
    axis.text.x = element_text(size = 16),   # Larger x-axis text
    axis.text.y = element_text(size = 16),   # Larger y-axis text
    plot.title = element_text(size = 20),     # Larger plot title text
    legend.text = element_text(size = 16)
  )

ggsave("figures/figure3.png", plot = num_periods)

# Figure 4A
type_char <- c("Firm", "Industry Association", "Civil Society", "Individual", "Government", "Academia", "IO")

type_comment <- final_text %>% 
  group_by(type) %>% 
  summarize(count = n()) %>% 
  filter(type != "na") %>% 
  filter(type != ".") %>% 
  filter(type != "") %>% 
  mutate(type_char)

pie_type <- ggplot(type_comment, aes(x=type_char, y=count)) +
  geom_bar(stat="identity", fill = "#3366cc") +
  theme_classic() +
  labs(y = "Number of Commenting Periods", x = NULL)+
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1), axis.text = element_text(size = 16), axis.title = element_text(size = 16), legend.position = "none")

ggsave("figures/figure4a.png", plot = pie_type)

# Figure 4B

country <- final_text  %>% 
  filter(country != "na") %>% 
  filter(country != "Europe") %>% 
  filter(country != "Asia") %>% 
  filter(country != "Oceania") %>% 
  group_by(country) %>% 
  summarize(count = n()) %>% 
  filter(count >= 27)

pie_country <- ggplot(country, aes(x= country, y=count)) +
  geom_bar(stat="identity", fill = "#3366cc") +
  theme_classic() +
  labs(y = "Number of Commenting Periods", x = NULL)+
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1), axis.text = element_text(size = 16), axis.title = element_text(size = 16), legend.position = "none")

ggsave("figures/figure4b.png", plot = pie_country)

# distribution of iv for firms and associations separately

# Figure 5

assoc_fe_bar <- rep_firm %>% 
  filter(type == 1) %>% 
  distinct(name_count, .keep_all = TRUE) %>% 
  group_by(count) %>% 
  summarize(count_firm = n())

num_assoc_periods <- ggplot(assoc_fe_bar, aes(x = count, y = count_firm))+
  geom_bar(stat = "identity", fill = "#3366cc")+
  theme_classic()+
  labs(y = "Number of Firms", x = "Number of Commenting Periods")+
  theme(axis.title.x = element_text(size = 20),  # Larger x-axis title text
    axis.title.y = element_text(size = 20),  # Larger y-axis title text
    axis.text.x = element_text(size = 16),   # Larger x-axis text
    axis.text.y = element_text(size = 16),   # Larger y-axis text
    plot.title = element_text(size = 20),     # Larger plot title text
    legend.text = element_text(size = 16)
  )

ggsave("figures/figure5.png", plot = num_assoc_periods)

# Figure 6
firm_fe_bar <- rep_firm %>% 
  filter(type == 0) %>% 
  distinct(name_count, .keep_all = TRUE) %>% 
  group_by(count) %>% 
  summarize(count_firm = n())

num_firm_periods <- ggplot(firm_fe_bar, aes(x = count, y = count_firm))+
  geom_bar(stat = "identity", fill = "#3366cc")+
  theme_classic()+
  labs(y = "Number of Firms", x = "Number of Commenting Periods")+
  theme(axis.title.x = element_text(size = 20),  # Larger x-axis title text
    axis.title.y = element_text(size = 20),  # Larger y-axis title text
    axis.text.x = element_text(size = 16),   # Larger x-axis text
    axis.text.y = element_text(size = 16),   # Larger y-axis text
    plot.title = element_text(size = 20),     # Larger plot title text
    legend.text = element_text(size = 16)
  )

ggsave("figures/figure6.png", plot = num_firm_periods)

# distribution of IV for top 5 sectors; included in appendix

# Figure 7

prof_services <- firm_size_log %>% 
  filter(type == 0) %>% 
  filter(sector == 54) %>% 
  distinct(name_count, .keep_all = TRUE) %>% 
  group_by(count) %>% 
  summarize(count_firm = n())

prof_service_periods <- ggplot(prof_services, aes(x = count, y = count_firm))+
  geom_bar(stat = "identity", fill = "#3366cc")+
  theme_classic()+
  labs(y = "Number of Firms", x = "Number of Commenting Periods")+
  theme(axis.title.x = element_text(size = 20),  # Larger x-axis title text
    axis.title.y = element_text(size = 20),  # Larger y-axis title text
    axis.text.x = element_text(size = 16),   # Larger x-axis text
    axis.text.y = element_text(size = 16),   # Larger y-axis text
    plot.title = element_text(size = 20),     # Larger plot title text
    legend.text = element_text(size = 16)
  )

ggsave("figures/figure7.png", plot = prof_service_periods)

# Figure 8

admin <- firm_size_log %>% 
  filter(type == 0) %>% 
  filter(sector == 56) %>% 
  distinct(name_count, .keep_all = TRUE) %>% 
  group_by(count) %>% 
  summarize(count_firm = n())

admin_periods <- ggplot(admin, aes(x = count, y = count_firm))+
  geom_bar(stat = "identity", fill = "#3366cc")+
  theme_classic()+
  labs(y = "Number of Firms", x = "Number of Commenting Periods")+
  theme(axis.title.x = element_text(size = 20),  # Larger x-axis title text
    axis.title.y = element_text(size = 20),  # Larger y-axis title text
    axis.text.x = element_text(size = 16),   # Larger x-axis text
    axis.text.y = element_text(size = 16),   # Larger y-axis text
    plot.title = element_text(size = 20),     # Larger plot title text
    legend.text = element_text(size = 16)
  )

ggsave("figures/figure8.png", plot = admin_periods)


# Figure 9

mgmt <- firm_size_log %>% 
  filter(type == 0) %>% 
  filter(sector == 55) %>% 
  distinct(name_count, .keep_all = TRUE) %>% 
  group_by(count) %>% 
  summarize(count_firm = n())

mgmt_periods <- ggplot(mgmt, aes(x = count, y = count_firm))+
  geom_bar(stat = "identity", fill = "#3366cc")+
  theme_classic()+
  labs(y = "Number of Firms", x = "Number of Commenting Periods")+
  theme(axis.title.x = element_text(size = 20),  # Larger x-axis title text
    axis.title.y = element_text(size = 20),  # Larger y-axis title text
    axis.text.x = element_text(size = 16),   # Larger x-axis text
    axis.text.y = element_text(size = 16),   # Larger y-axis text
    plot.title = element_text(size = 20),     # Larger plot title text
    legend.text = element_text(size = 16)
  )

ggsave("figures/figure9.png", plot = mgmt_periods)

# Figure 10

finance <- firm_size_log %>% 
  filter(type == 0) %>% 
  filter(sector == 52) %>% 
  distinct(name_count, .keep_all = TRUE) %>% 
  group_by(count) %>% 
  summarize(count_firm = n())

finance_periods <- ggplot(finance, aes(x = count, y = count_firm))+
  geom_bar(stat = "identity", fill = "#3366cc")+
  theme_classic()+
  labs(y = "Number of Firms", x = "Number of Commenting Periods")+
  theme(axis.title.x = element_text(size = 20),  # Larger x-axis title text
    axis.title.y = element_text(size = 20),  # Larger y-axis title text
    axis.text.x = element_text(size = 16),   # Larger x-axis text
    axis.text.y = element_text(size = 16),   # Larger y-axis text
    plot.title = element_text(size = 20),     # Larger plot title text
    legend.text = element_text(size = 16)
  )

ggsave("figures/figure10.png", plot = finance_periods)

# Figure 11

manu <- firm_size_log %>% 
  filter(type == 0) %>% 
  filter(sector == 32) %>% 
  distinct(name_count, .keep_all = TRUE) %>% 
  group_by(count) %>% 
  summarize(count_firm = n())

manu_periods <- ggplot(manu, aes(x = count, y = count_firm))+
  geom_bar(stat = "identity", fill = "#3366cc")+
  theme_classic()+
  labs(y = "Number of Firms", x = "Number of Commenting Periods")+
  theme(axis.title.x = element_text(size = 20),  # Larger x-axis title text
    axis.title.y = element_text(size = 20),  # Larger y-axis title text
    axis.text.x = element_text(size = 16),   # Larger x-axis text
    axis.text.y = element_text(size = 16),   # Larger y-axis text
    plot.title = element_text(size = 20),     # Larger plot title text
    legend.text = element_text(size = 16)
  )

ggsave("figures/figure11.png", plot = manu_periods)

# distribution of iv for top 5 countries

# Figure 12

usa <- firm_size_log %>% 
  filter(country == "USA") %>% 
  distinct(name_count, .keep_all = TRUE) %>% 
  group_by(count) %>% 
  summarize(count_firm = n())

usa_periods <- ggplot(usa, aes(x = count, y = count_firm))+
  geom_bar(stat = "identity", fill = "#3366cc")+
  theme_classic()+
  labs(y = "Number of Firms", x = "Number of Commenting Periods")+
  theme(axis.title.x = element_text(size = 20),  # Larger x-axis title text
    axis.title.y = element_text(size = 20),  # Larger y-axis title text
    axis.text.x = element_text(size = 16),   # Larger x-axis text
    axis.text.y = element_text(size = 16),   # Larger y-axis text
    plot.title = element_text(size = 20),     # Larger plot title text
    legend.text = element_text(size = 16)
  )

ggsave("figures/figure12.png", plot = usa_periods)

# Figure 13


uk <- firm_size_log %>% 
  filter(country == "UK") %>% 
  distinct(name_count, .keep_all = TRUE) %>% 
  group_by(count) %>% 
  summarize(count_firm = n())

uk_periods <- ggplot(uk, aes(x = count, y = count_firm))+
  geom_bar(stat = "identity", fill = "#3366cc")+
  theme_classic()+
  labs(y = "Number of Firms", x = "Number of Commenting Periods")+
  theme(axis.title.x = element_text(size = 20),  # Larger x-axis title text
    axis.title.y = element_text(size = 20),  # Larger y-axis title text
    axis.text.x = element_text(size = 16),   # Larger x-axis text
    axis.text.y = element_text(size = 16),   # Larger y-axis text
    plot.title = element_text(size = 20),     # Larger plot title text
    legend.text = element_text(size = 16)
  )

ggsave("figures/figure13.png", plot = uk_periods)

# Figure 14

france <- firm_size_log %>% 
  filter(country == "France") %>% 
  distinct(name_count, .keep_all = TRUE) %>% 
  group_by(count) %>% 
  summarize(count_firm = n())

france_periods <- ggplot(france, aes(x = count, y = count_firm))+
  geom_bar(stat = "identity", fill = "#3366cc")+
  theme_classic()+
  labs(y = "Number of Firms", x = "Number of Commenting Periods")+
  theme(axis.title.x = element_text(size = 20),  # Larger x-axis title text
    axis.title.y = element_text(size = 20),  # Larger y-axis title text
    axis.text.x = element_text(size = 16),   # Larger x-axis text
    axis.text.y = element_text(size = 16),   # Larger y-axis text
    plot.title = element_text(size = 20),     # Larger plot title text
    legend.text = element_text(size = 16)
  )

ggsave("figures/figure14.png", plot = france_periods)

# Figure 15

germany <- firm_size_log %>% 
  filter(country == "Germany") %>% 
  distinct(name_count, .keep_all = TRUE) %>% 
  group_by(count) %>% 
  summarize(count_firm = n())

germany_periods <- ggplot(germany, aes(x = count, y = count_firm))+
  geom_bar(stat = "identity", fill = "#3366cc")+
  theme_classic()+
  labs(y = "Number of Firms", x = "Number of Commenting Periods")+
  theme(axis.title.x = element_text(size = 20),  # Larger x-axis title text
    axis.title.y = element_text(size = 20),  # Larger y-axis title text
    axis.text.x = element_text(size = 16),   # Larger x-axis text
    axis.text.y = element_text(size = 16),   # Larger y-axis text
    plot.title = element_text(size = 20),     # Larger plot title text
    legend.text = element_text(size = 16)
  )

ggsave("figures/figure15.png", plot = germany_periods)

# Figure 16

neth <- firm_size_log %>% 
  filter(country == "Netherlands") %>% 
  distinct(name_count, .keep_all = TRUE) %>% 
  group_by(count) %>% 
  summarize(count_firm = n())

neth_periods <- ggplot(neth, aes(x = count, y = count_firm))+
  geom_bar(stat = "identity", fill = "#3366cc")+
  theme_classic()+
  labs(y = "Number of Firms", x = "Number of Commenting Periods")+
  theme(axis.title.x = element_text(size = 20),  # Larger x-axis title text
    axis.title.y = element_text(size = 20),  # Larger y-axis title text
    axis.text.x = element_text(size = 16),   # Larger x-axis text
    axis.text.y = element_text(size = 16),   # Larger y-axis text
    plot.title = element_text(size = 20),     # Larger plot title text
    legend.text = element_text(size = 16)
  )

ggsave("figures/figure16.png", plot = neth_periods)
```

# Table 2: Main Models
```{r}
firm_fe <- final_firm %>% 
  mutate(unique = ifelse(max_cos <= 0.5, 1, 0)) %>% 
  mutate(positive =ifelse(sentiment >= 0, 1, 0)) %>% 
  mutate(difficulty = ifelse(readability <= 50, 1, 0))

firm_only <- firm_size_log %>% 
  mutate(unique = ifelse(max_cos <= 0.5, 1, 0)) %>% 
  mutate(positive =ifelse(sentiment >= 0, 1, 0)) %>% 
  mutate(difficulty = ifelse(readability <= 50, 1, 0))

unique_lm5 <- feols(final_sim ~  log_count + unique + positive + difficulty + log_words | country + industry, data = firm_fe)
unique_lm5_firms <- feols(final_sim ~ log_count +  unique + positive + difficulty + log_words + log_size | country + sector, data = firm_only)

cm <- c( '(Intercept)' = 'Constant', "log_count" = "Number of Commenting Periods", 'unique' = 'Unique Comment', 'positive' = "Tone of Comment", "difficulty" = "Difficulty of Comment", "log_words" = "Length of Comment", 'log_size' = 'Firm Size')

gof <- c("nobs", "FE: country", "FE: industry", "FE: sector")

table1b <- list(unique_lm5, unique_lm5_firms)

modelsummary(table1b, output = 'tables/table2.tex', coef_omit = "Intercept",
                coef_map = cm, fmt = 2, stars = TRUE,
                gof_map = gof)
```

# Table 4: Firm Size
```{r}
# firm size
mod1 <- feols(final_sim ~ log_size, data = firm_size_log)
mod2 <- feols(final_sim ~ log_size + log_count, data = firm_size_log)
mod3 <- feols(final_sim ~ log_size + log_count + log_words, data = firm_size_log)
mod5 <- feols(final_sim ~ log_size + log_count + log_words | country, data = firm_size_log)
mod6 <- feols(final_sim ~ log_size + log_count + log_words | sector, data = firm_size_log)

models_firm <- list(mod1, mod2, mod3, mod5, mod6)
etable(models_firm)

cm <- c( '(Intercept)' = 'Constant', 'log_size' = 'Firm Size', 'log_count' = 'Number of Commenting Periods', 'log_words' = 'Length of Comment')

gof <- c("nobs", "FE: country", "FE: sector")


modelsummary(models_firm, output = 'tables/table4.tex', coef_omit = "Intercept",
                coef_map = cm, fmt = 2, stars = TRUE, 
                gof_map = gof)
```

# Table 5: Backlash Mechanism
```{r}
action_13 <- final_firm %>% 
  mutate(action13 = ifelse(date == "2014223" | date == "202036", 1, 0))

mod_13 <- lm(final_sim ~ action13, data = action_13)

cm <- c("action13" = "Action 13 Dummy Variable")

tab <- modelsummary(mod_13, output = 'tables/table5.tex', coef_omit = "Intercept",
                coef_map = cm, fmt = 2, stars = TRUE, 
                gof_map = gof)
```

# Table 6
```{r}
mod_feols1 <- feols(final_sim ~ log_count, data = final_firm)
mod_feols2 <- feols(final_sim ~ log_count + log_words, data = final_firm)
mod_feols3 <- feols(final_sim ~ log_count + log_words | industry, data = final_firm)
mod_feols4 <- feols(final_sim ~ log_count + log_words | country, data =final_firm)
mod_feols5 <- feols(final_sim ~ log_count + log_words | industry + country, data = final_firm)

cm <- c( '(Intercept)' = 'Constant', 'log_count' = 'Number of Commenting Periods', 'log_words' = 'Length of Comment', 'log_size' = 'Firm Size')

gof <- c("nobs", "FE: country", "FE: industry", "FE: sector")

main_mods_list <- list(mod_feols1, mod_feols2, mod_feols3, mod_feols4, mod_feols5)
etable(main_mods_list)

modelsummary(main_mods_list, output = 'tables/table6.tex', coef_omit = "Intercept",
                coef_map = cm, fmt = 2, stars = c('***' = .001), 
                gof_map = gof)
```

# Table 7
```{r}
mod1 <- feols(final_sim ~ log_count, data = firm_size_log)
mod2 <- feols(final_sim ~ log_count + log_size, data = firm_size_log)
mod3 <- feols(final_sim ~ log_count + log_size + log_words | country, data = firm_size_log)
mod4 <- feols(final_sim ~ log_count + log_size + log_words | sector, data = firm_size_log)
mod5 <- feols(final_sim ~ log_count + log_size + log_words | sector + country, data = firm_size_log)

models_firm <- list(mod1, mod2, mod3, mod4, mod5)
etable(models_firm)

cm <- c( '(Intercept)' = 'Constant', 'log_count' = 'Number of Commenting Periods', 'log_size' = 'Firm Size', 'log_words' = 'Length of Comment')

gof <- c("nobs", "FE: country", "FE: sector")

modelsummary(models_firm, output = 'tables/table7.tex', coef_omit = "Intercept",
                coef_map = cm, fmt = 2, stars = TRUE, 
                gof_map = gof)
```
# Table 8
```{r}
unique_lm1 <- feols(final_sim ~ unique + positive + difficulty, data = firm_fe)
unique_lm2 <- feols(final_sim ~  unique + positive + difficulty + log_words, data = firm_fe)
unique_lm3 <- feols(final_sim ~ unique + positive + difficulty + log_words | industry, data = firm_fe)
unique_lm4 <- feols(final_sim ~  unique + positive + difficulty + log_words | country, data = firm_fe)
unique_lm5 <- feols(final_sim ~  unique + positive + difficulty + log_words | country + industry, data = firm_fe)

unique_comments_all <- list(unique_lm1, unique_lm2, unique_lm3, unique_lm4, unique_lm5)

cm <- c( '(Intercept)' = 'Constant', "log_count" = "Number of Commenting Periods", 'unique' = 'Unique Comment', 'positive' = "Tone of Comment", "difficulty" = "Difficulty of Comment", "log_words" = "Length of Comment", 'log_size' = 'Firm Size')

gof <- c("nobs", "FE: country", "FE: industry")

modelsummary(unique_comments_all, output = 'tables/table8.tex', coef_omit = "Intercept",
                coef_map = cm, fmt = 2, stars = TRUE,
                gof_map = gof)
```

# Table 9: Only Quality Comments
```{r}
# if you filter to just the quality comments, there is still this reputational argument
firm_filter <- firm_fe %>% 
  filter(difficulty == 1) %>% 
  filter(positive == 1) %>% 
  filter(unique == 1)

mod_feols1 <- feols(final_sim ~ log_count, data = firm_filter)
mod_feols2 <- feols(final_sim ~ log_count + log_words, data = firm_filter)
mod_feols3 <- feols(final_sim ~ log_count + log_words | industry, data = firm_filter)
mod_feols4 <- feols(final_sim ~ log_count + log_words | country, data = firm_filter)
mod_feols5 <- feols(final_sim ~ log_count + log_words | industry + country, data = firm_filter)

list_informative <- list(mod_feols1, mod_feols2, mod_feols3, mod_feols4, mod_feols5)

cm <- c( '(Intercept)' = 'Constant', 'log_count' = "Number of Commenting Periods", "log_words" = "Number of Words (Logged)")

gof <- c("nobs", "FE: country", "FE: industry")

modelsummary(list_informative, output = 'tables/table9.tex', coef_omit = "Intercept",
                coef_map = cm, fmt = 2, stars = TRUE,
                gof_map = gof)
```
# Table 10: Cumulative Measure
```{r}
max_period <- max(firm_size_log$comment_number)

firm_cumulative <- final_firm %>% 
  group_by(name_count) %>% 
  complete(comment_number = 1:max_period, fill = list(participated = 0)) %>% 
  mutate(participated = ifelse(!is.na(year), 1, 0)) %>% 
  mutate(cum_comments = cumsum(participated)) %>% 
  ungroup() %>% 
  filter(!is.na(year)) %>% 
  mutate(log_cum = log(cum_comments)) %>% 
  mutate(one_time = ifelse(count == 1, 1, 0))

# if a firm has more cumulative comments in a certain comment period, more likely to have an influence
lm_cum1 <- feols(final_sim ~ log_cum | comment_number, data = firm_cumulative)
       
cm <- c( '(Intercept)' = 'Constant', 'log_cum' = 'Number of Commenting Periods')

gof <- c("nobs", "FE: comment_number")

modelsummary(lm_cum1, output = 'tables/table10.tex', coef_omit = "Intercept",
                coef_map = cm, fmt = 2, stars = c('***' = .001), 
                gof_map = gof)
```

# Table 11: One Time Commenters
```{r}
# one time comments are less likely to have an impact
lm1 <- feols(final_sim ~ one_time, data = firm_cumulative)
lm2 <- feols(final_sim ~ one_time + log_words, data = firm_cumulative)
lm3 <- feols(final_sim ~ one_time + log_words | industry, data = firm_cumulative)
lm4 <- feols(final_sim ~ one_time + log_words | country, data = firm_cumulative)
lm5 <- feols(final_sim ~ one_time + log_words | industry + country, data = firm_cumulative)

cm <- c( '(Intercept)' = 'Constant', 'one_time' = 'One Time Commenters', 'log_words' = "Comment Length")

gof <- c("nobs", "FE: industry", "FE: country")

one_time_mods <- list(lm1, lm2, lm3, lm4, lm5)

modelsummary(one_time_mods, output = 'tables/table11.tex', coef_omit = "Intercept",
                coef_map = cm, fmt = 2, stars = c('***' = .001), 
                gof_map = gof)
```

# Table 13: Hand Coding and Cosine Similarity
```{r}
hand_coding <- read.csv("data/hand_coding.csv")

hand_coding_clean <- hand_coding %>% 
  mutate(final_sim = sim*100)

hand_coding <- lm(all~ final_sim, data = hand_coding_clean)

cm <- c( '(Intercept)' = 'Constant', 'final_sim' = 'Cosine Similarity Score')

tab <- modelsummary(hand_coding, output = 'tables/table13.tex', coef_omit = "Intercept",
                coef_map = cm, fmt = 3, stars = TRUE, 
                  gof_omit = 'IC|Log|Adj')
```

# Figure 17: Correlation between Hand Coding and Cosine Similarity
```{r}
correlation <- cor(hand_coding_clean$all, hand_coding_clean$sim)

validation <- ggplot(hand_coding_clean, aes(x = all, y = sim)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE, color = "blue")+
  xlab("Hand Coding")+
  theme_classic()+
  ylab("Cosine Similarity Score")+
  ggtitle(paste("Correlation =", round(correlation, 2)))

ggsave("figures/figure17.png", plot = validation)
```

# Table 14 and 15: Jaccard Scores
```{r}
# Table 14: Jaccard Scores Firms and Associations

mod_feols1_match <- feols(matching ~ log_count, data = final_firm)
mod_feols2_match <- feols(matching ~ log_count + log_words, data = final_firm)
mod_feols3_match <- feols(matching ~ log_count | country, data = final_firm)
mod_feols4_match <- feols(matching ~ log_count + log_words | industry, data = final_firm)
mod_feols5_match <- feols(matching ~ log_count + log_words | country + industry, data = final_firm)

match <- list(mod_feols1_match, mod_feols2_match, mod_feols3_match, mod_feols4_match, mod_feols5_match)

cm <- c( '(Intercept)' = 'Constant', 'log_count' = 'Number of Commenting Periods', 'log_words' = 'Length of Comment')

gof <- c("nobs", "FE: country", "FE: industry")

modelsummary(match, output = 'tables/table14.tex', coef_omit = "Intercept",
                coef_map = cm, fmt = 2, stars = TRUE, 
                gof_map = gof)

# Table 15: Jaccard Scores Firms

mod1_match <- feols(matching ~ log_count, data = firm_size_log)
mod2_match <- feols(matching ~ log_count + log_size, data = firm_size_log)
mod3_match <- feols(matching ~ log_count + log_size + log_words, data = firm_size_log)
mod5_match <- feols(matching ~ log_count + log_size + log_words | country, data = firm_size_log)
mod6_match <- feols(matching ~ log_count + log_size + log_words | sector, data = firm_size_log)

models_firm <- list(mod1_match, mod2_match, mod3_match, mod5_match, mod6_match)

gof <- c("nobs", "FE: country", "FE: sector")

cm_firm <- c( '(Intercept)' = 'Constant', 'log_count' = 'Number of Commenting Periods', 'log_size' = "Firm Size", 'log_words' = 'Length of Comment')

modelsummary(models_firm, output = 'tables/table15.tex', coef_omit = "Intercept",
                coef_map = cm_firm, fmt = 2, stars = TRUE, 
                gof_map = gof)
```
# Table 16-19: Expertise
```{r}
# Table 16
most_likely_industry <- firm_size_log %>% 
  mutate(most_likely = ifelse(sector == "54", 1, 0))

expertise_simple <- feols(final_sim ~ most_likely, data = most_likely_industry)
expertise_mod <- feols(final_sim ~ most_likely | country, data = most_likely_industry)

list <- list(expertise_simple, expertise_mod)

cm <- c( '(Intercept)' = 'Constant', 'most_likely' = 'Professional and Technical Services Sector')

gof <- c("nobs", "FE: country")

modelsummary(list, output = 'tables/table16.tex', coef_omit = "Intercept",
                coef_map = cm, fmt = 2, stars = TRUE, 
                gof_map = gof)

# Table 17

filter_industry <- most_likely_industry %>% 
  filter(most_likely == 1)

expertise_simple <- feols(final_sim ~ log_count, data = filter_industry)
expertise <- feols(final_sim ~ log_count + log_size + log_words, data = filter_industry)
expertise_fe <- feols(final_sim ~ log_count + log_size + log_words | country, data = filter_industry)
expertise_mods <- list(expertise_simple, expertise, expertise_fe)

cm <- c( '(Intercept)' = 'Constant', 'log_count' = 'Number of Commenting Periods', 'log_size' = "Firm Size", 'log_words' = "Length of Comment")

gof <- c("nobs", "FE: country")


modelsummary(expertise_mods, output = 'tables/table17.tex', coef_omit = "Intercept",
                coef_map = cm, fmt = 2, stars = TRUE, 
                gof_map = gof)

# Table 18
big4 <- final_firm %>% 
  filter(name_count != "Deloitte") %>% 
  filter(name_count != "PWC") %>% 
  filter(name_count != "KPMG") %>% 
  filter(name_count != "EY")

big4_feols1 <- feols(final_sim ~ log_count, data = big4)
big4_feols2 <- feols(final_sim ~ log_count + log_words, data = big4)
big4_feols3 <- feols(final_sim ~ log_count + log_words | country, data = big4)
big4_feols4 <- feols(final_sim ~ log_count + log_words | industry, data = big4)
big4_feols5 <- feols(final_sim ~ log_count + log_words | country + industry, data = big4)

cm <- c( '(Intercept)' = 'Constant', 'log_count' = 'Number of Commenting Periods', 'log_words' = 'Length of Comment')

big4 <- list(big4_feols1, big4_feols2, big4_feols3, big4_feols4, big4_feols5)

gof <- c("nobs", "FE: country", "FE: industry")

modelsummary(big4, output = 'tables/table18.tex', coef_omit = "Intercept",
                coef_map = cm, fmt = 2, stars = TRUE, 
                gof_map = gof)

# Table 19

big4_firm <- firm_size_log %>% 
  filter(name_count != "Deloitte") %>% 
  filter(name_count != "PWC") %>% 
  filter(name_count != "KPMG") %>% 
  filter(name_count != "EY")

mod1_big4firm <- feols(final_sim ~ log_count, data = big4_firm)
mod2_big4firm <- feols(final_sim ~ log_count + log_size, data = big4_firm)
mod3_big4firm <- feols(final_sim ~ log_count + log_size + log_words, data = big4_firm)
mod5_big4firm <- feols(final_sim ~ log_count + log_size + log_words | country, data = big4_firm)
mod6_big4firm <- feols(final_sim ~ log_count + log_size + log_words | sector, data = big4_firm)

models_big4_firm <- list(mod1_big4firm, mod2_big4firm, mod3_big4firm, mod5_big4firm, mod6_big4firm)

cm <- c( '(Intercept)' = 'Constant', 'log_count' = 'Number of Commenting Periods', 'log_size' = 'Firm Size', 'log_words' = 'Length of Comment')

gof <- c("nobs", "FE: country", "FE: sector")

modelsummary(models_big4_firm, output = 'tables/table19.tex', coef_omit = "Intercept",
                coef_map = cm, fmt = 2, stars = TRUE, 
                gof_map = gof)
```

# Table 20: Summary
```{r}
summary <- lm(final_sim ~ summary, data = hand_coding_clean)
# having a short summary does not increase the likelihood that they are successful at 

cm_summary <- c( '(Intercept)' = 'Constant', 'summary' = 'Summary of Comment')

tab_summary <- modelsummary(summary, output = 'tables/table20.tex', coef_omit = "Intercept",
                coef_map = cm_summary, fmt = 3, stars = TRUE)
```

# Table 21 and 22: Logged Models
```{r}
# Table 21
log_mod_feols1 <- feols(log_sim ~ log_count, data = final_firm)
log_mod_feols2 <- feols(log_sim ~ log_count + log_words, data = final_firm)
log_mod_feols3 <- feols(log_sim ~ log_count + log_words | industry, data = final_firm)
log_mod_feols4 <- feols(log_sim ~ log_count + log_words | country, data = final_firm)
log_mod_feols5 <- feols(log_sim ~ log_count + log_words | country + industry, data = final_firm)

cm <- c( '(Intercept)' = 'Constant', 'log_count' = 'Number of Commenting Periods', 'log_words' = 'Length of Comment')

log_mods_list <- list(log_mod_feols1, log_mod_feols2, log_mod_feols3, log_mod_feols4, log_mod_feols5)

gof <- c("nobs", "FE: country", "FE: industry")

modelsummary(log_mods_list, output = 'tables/table21.tex', coef_omit = "Intercept",
                coef_map = cm, fmt = 2, stars = TRUE, 
                gof_map = gof)
# Table 22

mod1_log <- feols(log_sim ~ log_count, data = firm_size_log)
mod2_log <- feols(log_sim ~ log_count + log_size, data = firm_size_log)
mod3_log <- feols(log_sim ~ log_count + log_size + log_words, data = firm_size_log)
mod5_log <- feols(log_sim ~ log_count + log_size + log_words | country, data = firm_size_log)
mod6_log <- feols(log_sim ~ log_count + log_size + log_words | sector, data = firm_size_log)

models_firm <- list(mod1_log, mod2_log, mod3_log, mod5_log, mod6_log)

cm <- c( '(Intercept)' = 'Constant', 'log_count' = 'Number of Commenting Periods', 'log_size' = 'Firm Size', 'log_words' = 'Length of Comment')

gof <- c("nobs", "FE: country", "FE: sector")


modelsummary(models_firm, output = 'tables/table22.tex', coef_omit = "Intercept",
                coef_map = cm, fmt = 2, stars = TRUE, 
                gof_map = gof)
```